// Data Preparation
// ----------------------------

use "${Data_Clean}/EVSales_Panel_Data_distance_matched_CBG.dta", clear
sort groupid group_pair_id year month adjacent

* Drop unnecessay variables
drop rowid colid eiaid fixedcharge startdate enddate manually_ WkdyAvgEVRateT2-WkdyAvgEVRateT4 ///
	EVRateOffPeakTier2-EVRateOffPeakTier4 EVRatePeakTier2-EVRatePeakTier4 EVRateShoulderTier2-EVRateShoulderTier4 ///
	pastlastupdate lastlastupdate_EV 
	
rename SellPrice1 P_BEV
rename SaleNumber1 Q_BEV
rename SellPrice2 P_PHEV
rename SaleNumber2 Q_PHEV
rename zipcode_primary zip
rename mean_income income
rename census_bg_area area
rename retailaverage P_gas

foreach x in 1 2 3 5 10 {
	rename simpleaveprice_lt`x' simpleaveprice_lt`x'_ 
	rename obswgtprice_lt`x' obswgtprice_lt`x'_
	rename invdistwgtprice_lt`x' invdistwgtprice_lt`x'_
}

gen PQ_BEV = P_BEV*Q_BEV
gen PQ_PHEV = P_PHEV*Q_PHEV

* Generate auto class shares
gen hybrid_share = hybrid_count / total_car_count
gen luxury_share = luxury_count / total_car_count
gen mud_hh_share = total_mud / total_hh

* Generate price variable for electricity rates
gen ActivePrice = HighestTierAvgRate
replace ActivePrice = AvgRateTier1 if (utility_name == "Pacific Gas & Electric Company" & (year == 2017 & month>=1))
replace ActivePrice = AvgRateTier1 if (utility_name == "San Diego Gas & Electric" & (year == 2017 | (year==2016 & month>=7)))
replace ActivePrice = AvgRateTier1 if (utility_name == "Southern California Edison" & (year == 2017 & month>=1))

* Look at average price over the sample period
tab modate utility_name if (utility_name == "Pacific Gas & Electric Company"  | utility_name == "San Diego Gas & Electric" |utility_name == "Southern California Edison"), sum(ActivePrice) nost nofreq

* Add kWh cutoff

forvalues x = 0/3 {
	bys utility_name: egen MT`x'usage = mean(Tier`x'UsageAmt)
	replace Tier`x'UsageAmt = MT`x'usage if Tier`x'UsageAmt==.
	drop MT`x'usage
}


foreach kwhcutoff in 600 700 750 800 900 1000 {
gen KWH`kwhcutoff'MP = AvgRateTier0
forvalues x = 1 / 4 {	
	local y = `x' - 1
	replace KWH`kwhcutoff'MP = AvgRateTier`x' if (`kwhcutoff' > Tier`y'UsageAmt & `kwhcutoff' <= Tier`x'UsageAmt)
}
}



* Add average prices for different levels of consumption
foreach kwhcutoff in 600 750 900 {
gen KWH`kwhcutoff'AP = AvgRateTier0*`kwhcutoff'
replace KWH`kwhcutoff'AP = AvgRateTier0*Tier0UsageAmt + AvgRateTier1*(`kwhcutoff'-Tier0UsageAmt) if (`kwhcutoff' > Tier0UsageAmt)
replace KWH`kwhcutoff'AP = AvgRateTier0*Tier0UsageAmt + AvgRateTier1*(Tier1UsageAmt-Tier0UsageAmt) +  AvgRateTier2*(`kwhcutoff'-Tier1UsageAmt) if (`kwhcutoff' > Tier1UsageAmt)
replace KWH`kwhcutoff'AP =  AvgRateTier0*Tier0UsageAmt + AvgRateTier1*(Tier1UsageAmt-Tier0UsageAmt) + AvgRateTier2*(Tier2UsageAmt-Tier1UsageAmt) + AvgRateTier3*(`kwhcutoff'-Tier2UsageAmt) if (`kwhcutoff' > Tier2UsageAmt)
replace KWH`kwhcutoff'AP = AvgRateTier0*Tier0UsageAmt + AvgRateTier1*(Tier1UsageAmt-Tier0UsageAmt) + AvgRateTier2*(Tier2UsageAmt-Tier1UsageAmt) + AvgRateTier3*(Tier3UsageAmt-Tier2UsageAmt) + AvgRateTier4*(`kwhcutoff'-Tier3UsageAmt) if (`kwhcutoff' > Tier3UsageAmt)

replace KWH`kwhcutoff'AP=KWH`kwhcutoff'AP/`kwhcutoff'
}

tab utility_name, sum(KWH600AP)
tab utility_name, sum(KWH750AP)
tab utility_name, sum(KWH900AP)


drop if adjacent==.
drop modate number_price_obs number_station_obs climate_zone standard_count-prius_count total_single total_mud _merge stationcount_lt*

* Spread to make pairs wide
reshape wide P_BEV Q_BEV P_PHEV Q_PHEV PQ_BEV PQ_PHEV cbg distance* utility_name income population area fuel_economy_mean pop_density total_hh hybrid_share luxury_share mud_hh_share ///
	AvgRate* Tier* KWH* MaxTier* HighestTier* Wkdy* EVRate* zip region P_gas invdistwgtprice* simpleaveprice* obswgtprice* ActivePrice, i(groupid group_pair_id year month) j(adjacent)

* Make the data annual	
collapse (sum) Q_BEV* Q_PHEV* PQ_PHEV* PQ_BEV* (mean) cbg0 cbg1 distance* income* population* area* fuel_economy_mean* pop_density* total_hh* hybrid_share* luxury_share* mud_hh_share* ///
	AvgRate* Tier* MaxTier*  KWH* HighestTier* Wkdy* EVRate* P_gas* invdistwgtprice* simpleaveprice* obswgtprice* zip* ActivePrice*, by(groupid group_pair_id year utility_name* region*) 
	
*Restrict sample s.t. reference is IOUs only (later we run vice-versa)
* Add the dummies for the three CA IOUs
gen ref_PGE = (utility_name0 == "Pacific Gas & Electric Company")
gen ref_SCE = (utility_name0 == "Southern California Edison")
gen ref_SDGE = (utility_name0 == "San Diego Gas & Electric")
gen adj_PGE = (utility_name1 == "Pacific Gas & Electric Company")
gen adj_SCE = (utility_name1 == "Southern California Edison")
gen adj_SDGE = (utility_name1 == "San Diego Gas & Electric")

gen ref_IOU = ref_PGE + ref_SCE + ref_SDGE
gen adj_IOU = adj_PGE + adj_SCE + adj_SDGE

gen PGE_dum = ref_PGE - adj_PGE
gen SCE_dum = ref_SCE - adj_SCE
gen SDGE_dum = ref_SDGE - adj_SDGE

* Top tier prices
rename HighestTierAvgRate0 p_e_high0
rename HighestTierAvgRate1 p_e_high1

rename ActivePrice0 p_e_active0
rename ActivePrice1 p_e_active1

local pop_var "population"
gen share_BEV0 = Q_BEV0/(`pop_var'0/10000)
gen share_BEV1 = Q_BEV1/(`pop_var'1/10000)
gen share_PHEV0 = Q_PHEV0/(`pop_var'0/10000)
gen share_PHEV1 = Q_PHEV1/(`pop_var'1/10000)

foreach i in population pop_density income hybrid_share luxury_share mud_hh_share fuel_economy_mean p_e_high p_e_active P_gas ///
invdistwgtprice_lt1_ invdistwgtprice_lt2_ invdistwgtprice_lt3_ invdistwgtprice_lt5_ invdistwgtprice_lt10_ ///
simpleaveprice_lt1_ simpleaveprice_lt2_ simpleaveprice_lt3_ simpleaveprice_lt5_ simpleaveprice_lt10_ ///
obswgtprice_lt1_ obswgtprice_lt2_ obswgtprice_lt3_ obswgtprice_lt5_ obswgtprice_lt10_ ///
KWH750AP KWH750MP KWH600AP KWH600MP KWH900AP KWH900MP share_BEV share_PHEV {
	gen delta_`i' = `i'0-`i'1
	twoway histogram delta_`i'
}

/*Put prices in cents per KWH*/
foreach i of varlist delta_p_e* delta_KWH* {
	replace `i' = `i'*100
}

/*Put dollars and population in 000s*/
foreach i of varlist delta_population delta_income delta_pop_density {
	replace `i' = `i'/1000
}

/*Tag all CBG pairs that appear in appear when matching in both directions*/
gen double first_cbg = cbg0 if cbg0<cbg1
replace first_cbg = cbg1 if cbg1<cbg0
gen double second_cbg = cbg1 if cbg0<cbg1
replace second_cbg = cbg0 if cbg1<cbg0
duplicates tag year first_cbg second_cbg, gen(paircount)
sort year first_cbg second_cbg
gen duplicate_drop = (first_cbg[_n]==first_cbg[_n-1] & second_cbg[_n]==second_cbg[_n-1]&year[_n]==year[_n-1])
/*Generate distance on the IOU / non-IOU side of border*/


/*Set of group ids for each pair of utilities regardless of which is the reference*/
gen first_util = utility_name0 if utility_name0<utility_name1
replace first_util = utility_name1 if utility_name1<utility_name0
gen second_util = utility_name1 if utility_name0<utility_name1
replace second_util = utility_name0 if utility_name1<utility_name0
egen pairid = group(first_util second_util)

/*Generate distance on the IOU / non-IOU side of border*/

gen cons=1
gen IOU_dist = .
gen muni_dist = .

replace IOU_dist = distance_util0 if ref_IOU==1
replace IOU_dist = -distance_util1 if adj_IOU==1

replace muni_dist = distance_util0 if ref_IOU==0
replace muni_dist = -distance_util1 if adj_IOU==0


label var delta_population "$\Delta$ Population (000s)"
label var delta_pop_density "$\Delta$ Pop Density (000s ppl/sqm)"
label var delta_income "$\Delta$ Income (\\$000)"
label var delta_P_gas "$\Delta$ Gas Price (\\$/gal)"
label var delta_p_e_active "$\Delta$ Marg. Price (cents/kwh)"
label var delta_p_e_high "$\Delta$ Marg. Price - Top Tier (cents/kwh)"


foreach kwhcutoff in  600 750 900 {
label var  delta_KWH`kwhcutoff'MP "$\Delta$ Marg. Price at `kwhcutoff' kwh"
label var delta_KWH`kwhcutoff'AP "$\Delta$ Avg Price for `kwhcutoff' kwhs"
}

label var delta_fuel_economy_mean "$\Delta$ Mean Fuel Econ (mpg, 2013)"
label var delta_hybrid_share "$\Delta$ Hybrid Fleet Share (2013)"
label var delta_luxury_share "$\Delta$ Luxury Fleet Share (2013)"
label var delta_mud_hh_share "$\Delta$ MUD HH share (2013)"

/*Put gasoline in cents per gallon*/
foreach i of varlist delta_P_gas  delta_simpleave* delta_invdist* delta_obswgt* {
	replace `i' = `i'*100
	label var `i' "$\Delta$ Gas Price (cpg)" 
}

save "${Data_Clean}/spatial_RD_regdata.dta", replace

keep cbg0 cbg1
gen i = _n
reshape long cbg, i(i) j(index)
drop i index
duplicates drop

save "${Data_Clean}/RD_cbglist.dta", replace



